/******************************
This program estimates all the summary statistics 
for Panel A of Table 1 (Summary Statistics)
*******************************/
#delimit ;
set more off; 

include "./config.do" ;
include "./drbvars.do" ;
cap log close; 
log using "./00.01.sumstats_ba_$date.log", replace; 
use "$datadir/all_earnings_long.dta", clear; 

des;

tab year_grad year;

include "./restrictions.do" ;

include "./top_school.do" ;

gen totally_missing  = (national_earnings == 0) ;
gen instate_missing = (instate_earnings == 0 & national_earnings != 0) ;

gen log_earn_total = log(total_earnings);

/************************************************
Setting up a postfile

variable is the first column
stat is [mean,sd]
************************************************/
postfile support str20 variable str4 stat all all_N top top_N other other_N 
                                using $supportdir/support_sumstats_ba_$date.dta, replace;
				
local summarystats total_earnings national_earnings instate_earnings log_earn_total log_earn_national log_earn_instate
      		   male white black 
                   
                   ;

foreach tabvar in `summarystats' { ;
	qui sum `tabvar'  ;

	local all_mean = r(mean) ;
	local all_sd = r(sd) ;
	local all_N = r(N) ;
	
	qui sum `tabvar' if top_school == 1 ;

	local top_mean = r(mean) ;
	local top_sd = r(sd) ;
	local top_N = r(N) ;

	qui sum `tabvar' if top_school == 0 ;

	local other_mean = r(mean) ;
	local other_sd = r(sd) ;
	local other_N = r(N) ;

	post support ("`tabvar'") ("mean") (`all_mean') (`all_N') (`top_mean') (`top_N') (`other_mean') (`other_N') ;

	post support ("`tabvar'") ("sd") (`all_sd') (.) (`top_sd') (.) (`other_sd') (.) ;

} ; 


tabstat `summarystats', by(top_school) statistics(mean sd N);

foreach tabvar in male white black flag_se { ;
        tab top_school `tabvar' ;
};



foreach tabvar in totally_missing instate_missing { ;
foreach yrpost in 1 5 10 { ;
 di "***************** FOR YEAR POST: `yrpost' ******************" ;
        qui sum `tabvar' if year_post == `yrpost' ;

	local all_mean = r(mean) ;
	local all_sd = r(sd) ;
	local all_N = r(N) ;
	
	qui sum `tabvar' if top_school == 1 & year_post == `yrpost';

	local top_mean = r(mean) ;
	local top_sd = r(sd) ;
	local top_N = r(N) ;

	qui sum `tabvar' if top_school == 0 & year_post == `yrpost';

	local other_mean = r(mean) ;
	local other_sd = r(sd) ;
	local other_N = r(N) ;

	post support ("`tabvar'") ("`yrpost'") (`all_mean') (`all_N') (`top_mean') (`top_N') (`other_mean') (`other_N') ;

} ;
} ;

local tabvar flag_se ;

      	qui sum `tabvar'  ;

	local all_mean = r(mean) ;
	local all_sd = r(sd) ;
	local all_N = r(N) ;
	
	qui sum `tabvar' if top_school == 1 ;

	local top_mean = r(mean) ;
	local top_sd = r(sd) ;
	local top_N = r(N) ;

	qui sum `tabvar' if top_school == 0 ;

	local other_mean = r(mean) ;
	local other_sd = r(sd) ;
	local other_N = r(N) ;

	post support ("`tabvar'") ("`yrpost'") (`all_mean') (`all_N') (`top_mean') (`top_N') (`other_mean') (`other_N') ;

postclose support ;

gen all = 1 ;
gen national = (national_earnings > 0) ;
gen state = (instate_earnings > 0 ) ;
gen total = (total_earnings > 0 ) ;

preserve ;

collapse (max) all national state total,by(pik) ;

foreach var in all national state total { ;
	tab `var' ;
} ; 

restore ;



collapse (first) national_earnings , by(pik top_school);

count  ;

tab top_school;

di " TOTAL COUNT OF UNIQUE PERSONS: " r(N) ;

/********************************************
This final part of do-file takes the created file
summary stats and applies all the drb rounding rules
************************************************/


use $supportdir/support_sumstats_ba_$date.dta, clear ;

outsheet using $supportdir/support_sumstats_ba_$date.csv, comma replace ;

foreach tabvar in all top other { ;
	drbvars `tabvar', replace ;
	drbvars `tabvar'_N, counts(`tabvar'_N) gen(rounded_) ;
	tostring `tabvar' rounded_`tabvar'_N, replace force;
	replace `tabvar' = "0" + `tabvar' if substr(`tabvar',1,1) == "." ;
	replace `tabvar' = "(" + `tabvar' + ")" if stat == "sd"; 
} ;

outsheet variable stat all rounded_all_N top rounded_top_N other rounded_other_N using $supportdir/sumstats_ba_$date.csv, comma replace ;


